Matrices

Input matrix:

  • $X \in R^{N \times p}$ (input matrix of $N$ observations and $p$ features)

Decomposition matrices:

  • $F \in R^{N \times r}$
  • $G \in R^{r \times p}$
  • $W \in R^{p \times r}$

NMF

Definition:

\begin{equation} X \approx F G \end{equation}

C-NMF

Definition:

\begin{equation} X \approx F G = X W G \end{equation}

where the columns of $F = X W$ are convex combinations of the features of $X$.


In [27]:
import pymf
import sys
import msaf
from msaf import utils as U
from msaf import jams2
import msaf.algorithms.cnmf3.segmenter as S
import msaf.algorithms.foote.segmenter as Foote

fig_dir = "/Users/uri/Dropbox/NYU/Dissertation/manuscript/4/figures/"

In [28]:
def cnmf(X, r, niter, fig=False, aspect="auto"):
    nmf_mdl = pymf.CNMF(X, num_bases=r)
    nmf_mdl.factorize(niter=niter)
    F = np.asarray(nmf_mdl.W)
    G = np.asarray(nmf_mdl.H)
    W = np.asarray(nmf_mdl.G)
    
    if fig:
        plt.figure(figsize=(10,4))
        plt.gcf().suptitle("C-NMF")
        plt.subplot(1,3,1); plt.imshow(F, aspect=aspect, interpolation="nearest"); plt.title("F");
        plt.subplot(1,3,2); plt.imshow(G, aspect=aspect, interpolation="nearest"); plt.title("G"); 
        plt.subplot(1,3,3); plt.imshow(F * G, aspect=aspect, interpolation="nearest"); plt.title("X = F * G"); 
        plt.show()
        plt.figure(figsize=(10,5))
        plt.subplot(1,4,1); plt.imshow(X, aspect=aspect, interpolation="nearest"); plt.title("X");
        plt.subplot(1,4,2); plt.imshow(W, aspect=aspect, interpolation="nearest"); plt.title("W");
        plt.subplot(1,4,3); plt.imshow(np.asmatrix(X) * W, aspect=aspect, interpolation="nearest"); plt.title("F = X * W"); 
        plt.show()
    
    return F, G, W

def nmf(X, r, niter, fig=False, aspect="auto"):
    nmf_mdl = pymf.NMF(X, num_bases=r)
    nmf_mdl.factorize(niter=niter)
    F = np.asarray(nmf_mdl.W)
    G = np.asarray(nmf_mdl.H)
    
    if fig:
        plt.figure(figsize=(10,4))
        plt.gcf().suptitle("NMF")
        plt.subplot(1,3,1); plt.imshow(F, aspect=aspect, interpolation="nearest"); plt.title("F");
        plt.subplot(1,3,2); plt.imshow(G, aspect=aspect, interpolation="nearest"); plt.title("G"); 
        plt.subplot(1,3,3); plt.imshow(F * G, aspect=aspect, interpolation="nearest"); plt.title("X = F * G");
    
    return F, G
    
def read_ssm(file_path):
    m = 13
    dist = "correlation"
    chroma, mfcc, tonnetz, beats, dur, anal = msaf.io.get_features(file_path, annot_beats=False)
    F = U.lognormalize_chroma(chroma)
    F = S.median_filter(F, M=m)
    X = Foote.compute_ssm(F, metric=dist)
    return X

def read_chroma(file_path, m=13):
    dist = "correlation"
    chroma, mfcc, tonnetz, beats, dur, anal = msaf.io.get_features(file_path, annot_beats=False)
    F = U.lognormalize_chroma(chroma)
#     F = F ** 2
    F = S.median_filter(F, M=m)
    return F

In [29]:
from scipy.cluster.vq import whiten, vq, kmeans

def get_kmeans_bounds(X, k, niter):
    codebook, dist = kmeans(X, k, iter=niter)
    codes, disto = vq(X, codebook)
    # Find points where there's a cluster difference (boundary)
    b = np.where(np.diff(codes) != 0)[0] + 1
    return np.asarray(b)

In [26]:
N = 10
p = 6
r = 4
niter = 100
fig = True
aspect = 'auto'

np.random.seed(1)
X = np.random.random((N,p))
X = np.ones((N,p))
X[:4,:] = np.arange(p)
X[4:7,:] = np.arange(0,p*2,2)
X[7:,:] = np.arange(p)
# X[7:,:] = np.arange(0,p*3,3)
X = X.T
# X = read_ssm("/Users/uri/datasets/Segments/audio/Isophonics_01_-_Help!.mp3")
# X = read_ssm("/Users/uri/datasets/Segments/audio/Isophonics_01_-_Come_Together.mp3")
X = read_ssm("/Users/uri/datasets/Segments/audio/Isophonics_05_-_And_I_Love_Her.mp3")
# X = read_ssm("/Users/uri/datasets/Segments/audio/Isophonics_06_-_Tell_Me_Why.mp3")

# Factorize
F, G, W = cnmf(X, r, niter, fig=fig, aspect=aspect)
F, G = nmf(X, r, niter, fig=fig, aspect=aspect)

# Find boundaries using k-means
bounds = get_kmeans_bounds(X, r, niter)
print bounds


---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-26-591dc9e80f6c> in <module>()
     20 
     21 # Factorize
---> 22 F, G, W = cnmf(X, r, niter, fig=fig, aspect=aspect)
     23 F, G = nmf(X, r, niter, fig=fig, aspect=aspect)
     24 

<ipython-input-25-72a9acb7c59a> in cnmf(X, r, niter, fig, aspect)
     20         plt.subplot(1,3,1); plt.imshow(F, aspect=aspect, interpolation="nearest"); plt.title("F");
     21         plt.subplot(1,3,2); plt.imshow(G, aspect=aspect, interpolation="nearest"); plt.title("G");
---> 22         plt.subplot(1,3,3); plt.imshow(F * G, aspect=aspect, interpolation="nearest"); plt.title("X = F * G");
     23         plt.show()
     24         plt.figure(figsize=(10,5))

ValueError: operands could not be broadcast together with shapes (138,4) (4,138) 

In [4]:
# Experiment against consistency

X = read_ssm("/Users/uri/datasets/Segments/audio/Isophonics_01_-_Help!.mp3")
rank = 2
niter = 70
N = 100

def distNMF( x1, x2, y1, y2 ):
    min1 = distMat(x1, y1)
    min2 = distMat(x1, y2)
    if min1 < min2:
        m1 = min1
        m2 = distMat(x2, y2)
    else:
        m1 = min2
        m2 = distMat(x2, y1)
    return m1 + m2

def distMat( X, Y ):
    #return distance.euclidean(X,Y)
    return np.linalg.norm( X - Y )

CNMF = []
NMF = []

for i in range(N):
    cnmf = pymf.CNMF(X, num_bases=rank)
    cnmf.factorize(niter=niter)
    Wcnmf = np.asmatrix(cnmf.W)
    Hcnmf = np.asmatrix(cnmf.H)

    nmf = pymf.NMF(X, num_bases=rank)
    nmf.factorize(niter=niter)
    Wnmf = np.asmatrix(nmf.W)
    Hnmf = np.asmatrix(nmf.H)

    for r in range(rank):
        CNMF.append(Wcnmf[:,r]*Hcnmf[r,:])
        NMF.append(Wnmf[:,r]*Hnmf[r,:])

# Build distance matrix
RC = np.zeros((N,N))
RN = np.zeros((N,N))
M = N*N/2 - N/2
RCC = np.zeros((M))
RNN = np.zeros((M))
k = 0
for i in range(N):
    for j in range(i):
        RC[i,j] = distNMF( CNMF[2*i], CNMF[2*i+1], CNMF[2*j], CNMF[2*j+1] )
        RN[i,j] = distNMF( NMF[2*i], NMF[2*i+1], NMF[2*j], NMF[2*j+1] )
        RCC[k] = RC[i,j]
        RNN[k] = RN[i,j]
        k += 1

# fig = 10
# fig = plt.figure(fig)
# plt.clf()
# ax = fig.add_subplot(2, 1, 1)
# ax.imshow( RC, interpolation='nearest' )
# ax.set_title("C-NMF")
# ax = fig.add_subplot(2, 1, 2)
# ax.imshow( RN, interpolation='nearest' )
# ax.set_title("NMF")

histRNN = np.bincount(RNN.astype(int)) + 1
histRCC = np.ones(histRNN.shape)
h = np.bincount(RCC.astype(int)) + 1
histRCC[:h.shape[0]] = h


#print "histRNN", histRNN
#print "histRCC", histRCC

fig = 11
fig = plt.figure(fig, figsize=(5, 3))
plt.clf()
ax = fig.add_subplot(1, 1, 1)
#ax.plot( RCC )
ax.plot( histRCC, label='C-NMF' )
#ax.plot( np.sort(RCC)[::-1], label='C-NMF' )
#ax.plot( RNN )
ax.plot( histRNN, label='NMF' )
#ax.plot( np.sort(RNN)[::-1], label='NMF' )
ax.set_title("Consistency of C-NMF vs NMF")
ax.set_xlabel("Euclidean distance")
ax.set_ylabel("Count")
ax.set_yscale('log')
ax.legend()
plt.savefig('consistency.pdf', bbox_inches='tight')
plt.show()



In [241]:
from matplotlib import gridspec

X = read_ssm("/Users/uri/datasets/Segments/audio/Isophonics_01_-_Help!.mp3")
# X = read_ssm("/Users/uri/datasets/Segments/audio/Isophonics_01_-_I_Saw_Her_Standing_There.mp3")
# X = read_ssm("/Users/uri/datasets/Segments/audio/Isophonics_CD1_-_11_-_Black_Bird.mp3")
# X = read_ssm("/Users/uri/datasets/Segments/audio/Isophonics_05_-_Dig_It.mp3")
# X = read_ssm("/Users/uri/datasets/Segments/audio/Isophonics_05_-_And_I_Love_Her.mp3")
# X = read_chroma("/Users/uri/datasets/Segments/audio/Isophonics_01_-_Help!.mp3", m=9).T
X = read_chroma("/Users/uri/datasets/Segments/audio/Isophonics_05_-_And_I_Love_Her.mp3", m=9).T
# X = read_chroma("/Users/uri/datasets/Segments/audio/Isophonics_CD1_-_07_-_While_My_Guitar_Gently_Weeps.mp3", m=9).T

r = 4
niter = 300
fig = False
cmap = 'hot'

cF, cG, cW = cnmf(X, r, niter, fig=fig)
nF, nG = nmf(X, r, niter, fig=fig)

# plt.figure(figsize=(6,6))
# for rr in xrange(r):
#     cD = cF[:,rr] * cG[rr, :]
#     nD = nF[:,rr] * nG[rr, :]
#     print rr
#     plt.subplot(2,2,rr*2+1); plt.imshow(cD, interpolation="nearest")
#     plt.title("C-NMF")
#     plt.subplot(2,2,rr*2+2); plt.imshow(nD, interpolation="nearest")
#     plt.title("NMF")
# plt.show()

plt.figure(figsize=(14,4))
ax1 = plt.subplot2grid((5,14), (1,0), colspan=3, rowspan=3)
ax2 = plt.subplot2grid((5,14), (0,3), colspan=1, rowspan=5)
ax3 = plt.subplot2grid((5,14), (2,4), colspan=3, rowspan=1)
ax4 = plt.subplot2grid((5,14), (0,7), colspan=1, rowspan=5)
ax5 = plt.subplot2grid((5,14), (2,8), colspan=3, rowspan=1)
ax6 = plt.subplot2grid((5,14), (1,11), colspan=3, rowspan=3)
ax1.imshow(X, interpolation="nearest", aspect="auto", cmap=cmap)
ax1.set_title("X")
ax2.imshow(cW, interpolation="nearest", aspect="auto", cmap=cmap)
ax2.set_title("W")
ax2.set_xticks(xrange(r))
ax2.set_xticklabels(np.arange(r)+1)
ax2.text(-3.5, cW.shape[0]/2., r'$\times$', fontsize=15)
ax2.text(r - .25, cW.shape[0]/2., r'$\times$', fontsize=15)
ax3.imshow(cG, interpolation="nearest", aspect="auto", cmap=cmap)
ax3.set_yticks(xrange(r))
ax3.set_yticklabels(np.arange(r)+1)
ax3.set_title("G")
ax4.text(-3.5, cF.shape[0]/2. - 0.5, r'$=$', fontsize=15)
ax4.imshow(cF, interpolation="nearest", aspect="auto", cmap=cmap)
ax4.text(r - .25, cF.shape[0]/2. - 0.5, r'$\times$', fontsize=15)
ax4.set_title("F")
ax4.set_xticks(xrange(r))
ax4.set_xticklabels(np.arange(r)+1)
ax5.imshow(cG, interpolation="nearest", aspect="auto", cmap=cmap)
ax5.set_title("G")
ax5.set_yticks(xrange(r))
ax5.set_yticklabels(np.arange(r)+1)
ax6.imshow(cF * cG, interpolation="nearest", aspect="auto", cmap=cmap)
ax6.text(-25, nF.shape[0]/2., r'$=$', fontsize=15)
ax6.set_title("X'")
plt.tight_layout()
plt.savefig('cnmf-example.pdf', bbox_inches='tight')
plt.show()

plt.figure(figsize=(7, 4))
ax1 = plt.subplot2grid((5,7), (0,0), colspan=1, rowspan=5)
ax2 = plt.subplot2grid((5,7), (2,1), colspan=3, rowspan=1)
ax3 = plt.subplot2grid((5,7), (1,4), colspan=3, rowspan=3)
ax1.imshow(nF, interpolation="nearest", aspect="auto", cmap=cmap)
ax1.set_title("F")
ax1.set_xticks(xrange(r))
ax1.set_xticklabels(np.arange(r)+1)
ax1.text(r - 0.5, nF.shape[0]/2., r'$\times$', fontsize=15)
ax2.imshow(nG, interpolation="nearest", aspect="auto", cmap=cmap)
ax2.set_title("G")
ax2.set_yticks(xrange(r))
ax2.set_yticklabels(np.arange(r)+1)
ax3.imshow(nF * nG, interpolation="nearest", aspect="auto", cmap=cmap)
ax3.text(-20, nF.shape[0]/2., r'$=$', fontsize=15)
ax3.set_title("X'")
plt.tight_layout()
plt.savefig('nmf-example.pdf', bbox_inches='tight')
plt.show()



In [34]:
# Example of extraction of boundaries using C-NMF
# in_path = "/Users/uri/datasets/Segments/audio/Isophonics_05_-_And_I_Love_Her.mp3"
# in_path = "/Users/uri/datasets/Segments/audio/Isophonics_01_-_Come_Together.mp3"
# in_path = "/Users/uri/datasets/Segments/audio/Isophonics_03_-_Lucy_In_The_Sky_With_Diamonds.mp3"
# in_path = "/Users/uri/datasets/Segments/audio/Isophonics_CD1_-_10_-_I'm_So_Tired.mp3"
# in_path = "/Users/uri/datasets/Segments/audio/Isophonics_06_-_She's_Leaving_Home.mp3"
in_path = "/Users/uri/datasets/Segments/audio/Isophonics_08_-_Strawberry_Fields_Forever.mp3"

jam_file = in_path.replace(msaf.Dataset.audio_dir, msaf.Dataset.references_dir)
jam_file = jam_file.replace("mp3", "jams")
chroma, mfcc, tonnetz, beats, dur, anal = msaf.io.get_features(in_path, annot_beats=False)
h = 14
R = 10
X = read_chroma(in_path, m=h).T

ann_bounds = msaf.io.read_ref_bound_frames(in_path, beats)
ann_inter, ann_labels = jams2.converters.load_jams_range(jam_file,
                "sections", annotator=0, context=msaf.prefix_dict["Isophonics"])

print "Unique segments:", len(np.unique(ann_labels))
print "Annotated bounds: %s" % ann_bounds
print "Annotated labels: %s" % ann_labels
# logging.info("Estimated bounds: %s" % bound_idxs)

r = 4
niter = 300
fig = False
cmap = 'hot'

F, G, W = cnmf(X, r, niter, fig=fig)

G = G.T
orG = np.copy(G)
idx = np.argmax(G, axis=1)
max_idx = np.arange(G.shape[0])
max_idx = (max_idx, idx.flatten())
G[:, :] = 0
G[max_idx] = idx + 1

# TODO: Order matters?
oG = np.copy(G)
G = np.sum(G, axis=1)
mG = np.copy(G)
G = S.median_filter(G[:, np.newaxis], R)

bound_idxs = np.where(np.diff(G.flatten()) != 0)[0] + 1

# plt.imshow(X, interpolation="nearest", aspect="auto", cmap="hot"); plt.show()

plt.figure(figsize=(10, 5))
plt.subplot(2, 2, 1)
plt.imshow(orG.T, interpolation="nearest", aspect="auto", cmap="hot")
plt.title("(a) \n $G$")
plt.yticks(np.arange(r))
plt.xlabel("Time Frames")
plt.gca().set_yticklabels(np.arange(r)+1)
plt.subplot(2, 2, 2)
plt.imshow(oG.T, interpolation="nearest", aspect="auto", cmap="hot")
plt.title("(b) \n $\mathcal{G}$")
plt.yticks(np.arange(r))
plt.xlabel("Time Frames")
plt.gca().set_yticklabels(np.arange(r)+1)
plt.subplot(2, 2, 3)
plt.imshow(mG[:,np.newaxis].T, interpolation="nearest", aspect="auto", cmap="hot")
plt.title("(c) \n $\mathbf{g}$")
plt.yticks([])
plt.xlabel("Time Frames")
plt.subplot(2, 2, 4)
plt.imshow(G.T, interpolation="nearest", aspect="auto", cmap="hot")
plt.title("(d) \n $\mathbf{g}'$")
plt.yticks([])
plt.xlabel("Time Frames")
for ann_bound in ann_bounds[2:-2]:
    plt.axvline(ann_bound, ymin=0, ymax=1, linewidth=2.0, color="g")
plt.tight_layout()
plt.savefig(fig_dir + "cnmf-boundaries-example.pdf")
plt.show()


Unique segments: 5
Annotated bounds: [  0   0  21  49  87 108 135 157 183 205 287 287]
Annotated labels: [u'silence', u'intro', u'refrain', u'verse', u'refrain', u'verse', u'refrain', u'verse', u'refrain', u'outro_(double_fade-out)', u'silence']

In [47]:
# Labels
print bound_idxs
print ann_labels

def most_frequent(x):
    """Returns the most frequent value in x."""
    return np.argmax(np.bincount(x))

def fill_frames(labels, bounds, N):
    """Fills up the frames with label ids."""
    label_frames = np.zeros(N)
    all_bounds = [0] + bounds.tolist()
    for i, (start, end) in enumerate(zip(all_bounds[:-1], all_bounds[1:])):
        label_frames[start:end] = labels[i]
    label_frames[all_bounds[-1]:] = labels[-1]
    return label_frames

ann_labels = MSAF.read_annot_int_labels(in_path)
print ann_labels

F, G, W = cnmf(X, r+1, niter, fig=fig)
G = G.T
orG = np.copy(G)
idx = np.argmax(G, axis=1)
max_idx = np.arange(G.shape[0])
max_idx = (max_idx, idx.flatten())
G[:, :] = 0
G[max_idx] = idx + 1

# TODO: Order matters?
oG = np.copy(G)
G = np.sum(G, axis=1)
mG = np.copy(G)
G = S.median_filter(G[:, np.newaxis], R-2)

N = len(G.flatten())
label_frames = G.flatten()
label_frames = np.asarray(label_frames, dtype=int)

est_labels = [label_frames[0]]
bound_inters = zip(bound_idxs[:-1], bound_idxs[1:])
for bound_inter in bound_inters:
    if bound_inter[1] - bound_inter[0] <= 0:
        est_labels.append(np.max(label_frames) + 1)
    else:
        est_labels.append(most_frequent(
            label_frames[bound_inter[0]: bound_inter[1]]))
    #print bound_inter, labels[-1]
est_labels.append(label_frames[-1])

label_frames = fill_frames(est_labels, bound_idxs, N)
ann_label_frames = fill_frames(ann_labels, ann_bounds[1:-1], N)

# PLOT
plt.figure(figsize=(10, 5))
plt.subplot(2, 1, 1)
pre_L = np.vstack((old_label_frames, G.T))
plt.imshow(pre_L, interpolation="nearest", aspect="auto", cmap="hot")
for bound_idx in bound_idxs:
    plt.axvline(bound_idx, ymin=0, ymax=1, linewidth=2.0, color="b")
plt.yticks([0,1])
plt.gca().set_yticklabels(["$\mathbf{g}'$", "$\mathbf{g}'_{r'}$"])
plt.xlabel("Time Frames")

plt.subplot(2, 1, 2)
L = np.vstack((label_frames[:, np.newaxis].T, ann_label_frames[:, np.newaxis].T))
plt.imshow(L, interpolation="nearest", aspect="auto", cmap="hot")
plt.yticks([0,1])
plt.gca().set_yticklabels(["Estimated", "Annotated"])
plt.xlabel("Time Frames")

plt.tight_layout()
plt.savefig("cnmf-labels-example.pdf")
plt.show()

print est_labels
print ann_labels


[ 11  23  40  87 102 138 152 181 201 215]
[1, 2, 3, 4, 3, 4, 3, 4, 3, 5, 1]
[1, 2, 3, 4, 3, 4, 3, 4, 3, 5, 1]
[1, 1, 2, 1, 3, 4, 3, 4, 3, 4, 5]
[1, 2, 3, 4, 3, 4, 3, 4, 3, 5, 1]

In [40]:
track_path = "/Users/uri/datasets/Segments/audio/Isophonics_05_-_And_I_Love_Her.mp3"
Xm = read_chroma(track_path, m=9).T
X = read_chroma(track_path, m=1).T

pitches = ["A", "A#", "B", "C", "C#", "D", "D#", "E", "F" ,"F#", "G", "G#"]
plt.figure(figsize=(9,6))
plt.subplot(2, 1, 1)
plt.imshow(X, interpolation="nearest", aspect="auto", cmap="hot")
plt.yticks(np.arange(12))
plt.gca().set_yticklabels(pitches)
plt.xlabel("Time Frames")
plt.subplot(2, 1, 2)
plt.imshow(Xm, interpolation="nearest", aspect="auto", cmap="hot")
plt.yticks(np.arange(12))
plt.gca().set_yticklabels(pitches)
plt.xlabel("Time Frames")
plt.tight_layout()
plt.savefig("chromagram-example.pdf")
plt.show()